*******************************************************************************************
*******************************************************************************************
******************** Reclassification Risk in the Small Group Health Insurance Market
******************* by Sebastian Fleitas, Gautam Gowrisankaran and Anthony Lo Sasso 
*******************************************************************************************
*******************************************************************************************
*******************************************************************************************
******************** Table 4 Firm Level
*******************************************************************************************
*******************************************************************************************
*******************************************************************************************

cd "~/Dropbox/ReclassificationRisk/"
use database_individual_level.dta, replace 


/*
**************************************************************************************************
** BEGINING OF USIC ESTIMATES
** TWO STEP REGRESSION USING THIS SAMPLE (ROBUSTNESS ON LEAVING THE SAMPLE) **
**************************************************************************************************
tsset mbr_sys_id year
egen stderrorscluster = group(customer_number year)
gen SEX = (gdr_cd=="F") 
gen industry = substr(sic_cd,1,1)
tab industry, gen(industy_dummy)
**************************************************************************************************
local conditions "lagged_code_hypertension lagged_code_heartfailure lagged_chronic_ami laggeed_code_respfailure  lagged_code_brainhemorr lagged_chronic_cancer lagged_chronic_diabetes lagged_code_asthma" 
mkspline reg16 1 reg17 = mean_pred_riskscore_rp 
*if year==2015 /*making the spline on the delta for 2015, cross section estimate */
*gen reg1 =  mean_pred_riskscore_rp 
gen reg2 = laggedscore
gen reg3 = age 
gen reg4 = SEX
gen reg5 = industy_dummy1
gen reg6 = industy_dummy2
gen reg7 = industy_dummy3
gen reg8 = industy_dummy4 
gen reg9 = industy_dummy5
gen reg10 = industy_dummy6
gen reg11 = industy_dummy7
gen reg12 = industy_dummy8
gen reg13 = industy_dummy9
gen reg14 = industy_dummy10
gen reg15 = numpeople
probit exit_new reg* if year==2014, cluster(stderrorscluster) noconstant
forvalues i=2/17 {
scalar coeffprobit`i' = _b[reg`i']
}
*probit exit_new laggedscore mean_pred_riskscore_rp age SEX industy_dummy* numpeople if year==2014, cluster(stderrorscluster) noconstant
local n_probit = e(N)
matrix V = e(V)* `n_probit'
*ivprobit exit_new (laggedscore mean_pred_riskscore_rp = laggedscore_ORS mean_ORS_riskscore_rp) age SEX industy_dummy* numpeople if year==2014, cluster(stderrorscluster) 
*margins, dydx(*)
predict linear_index, xb
gen prob_leaving = normal(linear_index)
gen prob_leaving2 = prob_leaving^2
gen prob_leaving3 = prob_leaving^3
gen prob_leaving4 = prob_leaving^4
gen prob_leaving5 = prob_leaving^5
gen prob_leaving6 = prob_leaving^6 
tsset mbr_sys_id year
***** *****


*** descriptive statistics sample firm level
gen in2015_aux = (year==2015)
bys customer_number: egen in2015 = max(in2015_aux)
gen in2014_aux = (year==2014)
bys customer_number: egen in2014 = max(in2014_aux)
gen in2013_aux = (year==2013)
bys customer_number: egen in2013 = max(in2013_aux)
*/

gen v2013_2014 = 1 if in2013==1 & in2014==1 & in2015!=1
gen v2014_2015 = 1 if in2013!=1 & in2014==1 & in2015==1
gen v2013_2014_2015 = 1 if in2013==1 & in2014==1 & in2015==1
gen v2013_2015 = 1 if in2013==1 & in2014!=1 & in2015==1


*** Do the table
** 0_November2018_Table6_Paper.tex
collapse (mean) mean_premium mean_pred_riskscore_rp paid_sum allowed_sum v2014_2015 , by(customer_number year)
tab(year), gen(yeardum)

** FOR ALL SAMPLE 

reghdfe  mean_premium mean_pred_riskscore_rp yeardum*  ,  absorb(customer_number) vce(cluster customer_number year)
reghdfe  mean_premium mean_pred_riskscore_rp yeardum*  ,  noabsorb vce(cluster customer_number  )
